mean(hukou_matrix$hukou_2, na.rm = TRUE),
mean(hukou_matrix$hukou_1, na.rm = TRUE)),
sd = c(0,
0,
std.error(hukou_matrix$hukou_2, na.rm = TRUE),
std.error(hukou_matrix$hukou_1, na.rm = TRUE)
),
Attributes = c("Rural", "Urban", "Rural", "Urban"),
dataset = c("China Internet", "China Internet",
"Our Sample", "Our Sample"))
# Graph
descriptive_location <- ggplot(Hukou, aes(x = Attributes, y = Values, fill = dataset)) +
geom_bar(stat="identity", position=position_dodge(), color="black")+
scale_fill_grey()+
geom_errorbar(aes(ymin=Values-1.96*sd, ymax=Values+1.96*sd), width=.2,
position=position_dodge(.9))+
theme_minimal()+
xlab("Location")+
ylab("Proportion")+
coord_cartesian(ylim = c(0, 1))+
theme(legend.position = "top",
legend.title=element_blank(),
text = element_text(family = "Times", size = 12),
axis.text = element_text(family = "Times", size = 11));descriptive_location
# Visualize the results
descriptive_location <- ggplot(Hukou, aes(x = Attributes, y = Values, fill = dataset)) +
geom_bar(stat = "identity", position = position_dodge(), color = "black")+
scale_fill_grey() +
geom_errorbar(aes(ymin = Values - 1.96*sd, ymax = Values + 1.96*sd), width = .2,
position = position_dodge(.9))+
theme_minimal() +
xlab("Location") +
ylab("Proportion") +
coord_cartesian(ylim = c(0, 1)) +
theme(legend.position = "top",
legend.title = element_blank(),
text = element_text(family = "Times", size = 12),
axis.text = element_text(family = "Times", size = 11))
# Female
female_matrix <- df %>% select(female.x) %>% dummy_cols()
#################################################
### Replication Code for Study 2              ###
### Title: Mimicking Democracy                ###
### Authors: Hsu Yumin Wang, Eddy S. F. Yeung ###
### Version: April 26, 2024                   ###
#################################################
### Set-up ----
## Clear the work environment and set the working directory
rm(list = ls())
setwd("~/Desktop/redist-propaganda/replication/Study 2")
## Load the required packages
library(tidyverse)   # version 2.0.0
library(estimatr)    # version 1.0.0
library(parameters)  # version 0.15.0
library(texreg)      # version 1.37.5
library(haven)       # version 2.4.3
library(car)         # version 3.0-12
library(TOSTER)      # version 0.4.1
library(interflex)   # version 1.2.6
library(mice)        # version 3.14.0
library(psych)       # version 2.1.9
library(pscl)        # version 1.5.5
library(gridExtra)   # version 2.3
library(lemon)       # version 0.4.7
library(cowplot)     # version 1.1.1
## Read the dataset
df <- read.csv("main_data.csv")
### Code individual variables and impute the covariates ----
## Code individual variables
# Age (18-29 = 1; 60+ = 5)
table(df$age)
# Female (= 1)
df$female <- ifelse(df$gender == 2, 1, 0)
table(df$female)
# Education (1 = primary school or below; postgraduate degree = 6)
df$educ <- ifelse(df$educ == 6 | df$educ == 7, 6, df$educ)
table(df$educ)
# Income (1 = below 500 RMB; 10 = above 20,000 RMB)
table(df$income)
# CCP membership
df$ccp <- ifelse(df$party == 3, 1, 0)
table(df$ccp)
# policy trust
table(df$policy_trust)
# Screeners (estimated based on 2-parameter IRT models)
df$screener1_correct <- ifelse(df$screener1 == "3,5", 1, 0)
df$screener2_correct <- ifelse(df$screener2 == "5,11", 1, 0)
df$screener3_correct <- ifelse(df$TIPI_X == 3, 1, 0)
df$screener4_correct <- ifelse(df$TIPI_Y == 1, 1, 0)
df_IRT <- dplyr::select(df,
screener1_correct, screener2_correct,
screener3_correct, screener4_correct)
df_IRT <- rollcall(df_IRT,
yea = 1, nay = 0, missing = c(NA), notInLegis = 9,
legis.names = NULL, vote.names = NULL,
legis.data = NULL, vote.data = NULL,
desc = NULL, source = NULL)
set.seed(1)
model_screeners <- ideal(df_IRT,
store.item = T, normalize = T,
maxiter = 10000, thin = 10)
df$attentiveness <- model_screeners$xbar[ , 1]
# Table S4
df_temp <- subset(df, group == 0 | group == 1 | group == 2)
summary(df_temp$screener1_correct)
sd(df_temp$screener1_correct)
summary(df_temp$screener2_correct)
sd(df_temp$screener2_correct)
summary(df_temp$screener3_correct)
sd(df_temp$screener3_correct, na.rm = TRUE)
summary(df_temp$screener4_correct)
sd(df_temp$screener4_correct, na.rm = TRUE)
# Political knowledge (estimated based on 2-parameter IRT models)
df$know1_correct <- ifelse(df$know1 == 3, 1, 0)
df$know2_correct <- ifelse(df$know2 == 4, 1, 0)
df$know3_correct <- ifelse(df$know3 == 2, 1, 0)
df$know4_correct <- ifelse(df$know4 == 1, 1, 0)
df_know_IRT <- dplyr::select(df,
know1_correct, know2_correct,
know3_correct, know4_correct)
df_know_IRT <- rollcall(df_know_IRT,
yea = 1, nay = 0, missing = c(NA), notInLegis = 9,
legis.names = NULL, vote.names = NULL,
legis.data = NULL, vote.data = NULL,
desc = NULL, source = NULL)
set.seed(1)
model_knowledge <- ideal(df_know_IRT,
store.item = T, normalize = T,
maxiter = 10000, thin = 10)
df$knowledge <- model_knowledge$xbar[ , 1]
# Personality: agreeableness
df$agreeableness <- ((6 - df$TIPI_2) + df$TIPI_7) / 2
table(df$agreeableness)
# Personality: conscientiousness
df$conscientiousness <- (df$TIPI_3 + (6 - df$TIPI_8)) / 2
table(df$conscientiousness)
# Personality: extraversion
df$extraversion <- (df$TIPI_1 + (6 - df$TIPI_6)) / 2
table(df$extraversion)
# Social-equity understanding of democracy (estimated based on 2-parameter IRT models)
df$define1_social <- ifelse(df$define1 == 1, 1, 0)
df$define2_social <- ifelse(df$define2 == 1, 1, 0)
df$define3_social <- ifelse(df$define3 == 1, 1, 0)
df$define4_social <- ifelse(df$define4 == 1, 1, 0)
df_define_IRT <- dplyr::select(df,
define1_social, define2_social,
define3_social, define4_social)
df_define_IRT <- rollcall(df_define_IRT,
yea = 1, nay = 0, missing = c(NA), notInLegis = 9,
legis.names = NULL, vote.names = NULL,
legis.data = NULL, vote.data = NULL,
desc = NULL, source = NULL)
set.seed(1)
model_understanding <- ideal(df_define_IRT,
store.item = T, normalize = T,
maxiter = 10000, thin = 10)
df$understanding <- model_understanding$xbar[ , 1]
# Table S6
table(df$define1_social)
table(df$define1_social) %>% prop.table()
table(df$define2_social)
table(df$define2_social) %>% prop.table()
table(df$define3_social)
table(df$define3_social) %>% prop.table()
table(df$define4_social)
table(df$define4_social) %>% prop.table()
# Prior influence by state messaging (1 = likely pretreated; 0 = unlikely)
df$pretreat_news <- ifelse(df$pretreat_news == 8, 0, df$pretreat_news)
df$pretreated <- ifelse(df$pretreat_news > median(df$pretreat_news, na.rm = T) &
df$pretreat_eval > median(df$pretreat_eval, na.rm = T),
1, 0)
table(df$pretreated)
# Covid satisfaction
table(df$covid)
table(df$covid) %>% prop.table()
# Treatment conditions
table(df$group)
# Democratic legitimacy (mean effects index with imputation)
cal_MEI <-
function(Z, outcome_mat, to_reorient, reorient = F, greedy = T, impute = F) {
if(impute == T) {
R <- 1 * is.na(outcome_mat)
means_for_imputation <-
rbind(apply(outcome_mat[Z == 0, ], MAR = 2, FUN = mean, na.rm = T),
apply(outcome_mat[Z == 1, ], MAR = 2, FUN = mean, na.rm = T),
apply(outcome_mat[Z == 2, ], MAR = 2, FUN = mean, na.rm = T))
to_impute <- R * means_for_imputation[Z + 1, ]
outcome_mat[is.na(outcome_mat)] <- 0
outcome_mat <- outcome_mat + to_impute
}
c_mean <- apply(X = outcome_mat[Z == 0, ], MARGIN = 2, FUN = mean, na.rm = T)
c_sd <- apply(X = outcome_mat[Z == 0, ], MARGIN = 2, FUN = sd, na.rm = T)
z_score <- t(t(sweep(outcome_mat, 2, c_mean)) / c_sd)
index_numerator <- rowSums(z_score)
if(greedy == T) {
n_outcomes <- rowSums(!is.na(z_score))
}
else if(greedy == F){
n_outcomes <- ncol(outcome_mat)
}
index <- index_numerator / n_outcomes
index <- (index - mean(index[Z == 0], na.rm = T)) / sd(index[Z == 0], na.rm = T)
return(index)
}
vio_matrix <- cbind(df$demo1, df$demo2, 10 - df$demo3)
set.seed(1)
df <- df %>%
mutate(democraticness = cal_MEI(Z = df$group, outcome_mat = vio_matrix,
impute = T))
# Satisfaction with the government's performance in promoting social equity
# (-3 = very dissatisfied, 3 = very satisfied)
df <- df %>%
mutate(satisfaction = case_when(
equity2a == 1 ~ 2,
equity2a == 2 ~ 3,
equity2b == 1 ~ -2,
equity2b == 2 ~ -3,
equity2c == 1 ~ 1,
equity2c == 2 ~ -1,
equity2c == 3 ~ 0
))
table(df$satisfaction)
# Perceived extent to which the government cares about improving social equity
# (-3 = does not care a great deal; 3 = cares a great deal)
df <- df %>%
mutate(care = case_when(
care2a == 1 ~ 2,
care2a == 2 ~ 3,
care2b == 1 ~ -2,
care2b == 2 ~ -3,
care2c == 1 ~ 1,
care2c == 2 ~ -1,
care2c == 3 ~ 0
))
table(df$care)
# Evaluation of rule of law development in China
# (-3 = very not well-developed; 3 = very well-developed)
df <- df %>%
mutate(rule_of_law = case_when(
law2a == 1 ~ 2,
law2a == 2 ~ 3,
law2b == 1 ~ -2,
law2b == 2 ~ -3,
law2c == 1 ~ 1,
law2c == 2 ~ -1,
law2c == 3 ~ 0
))
table(df$rule_of_law)
# Evaluation of foreign affairs management in China
# (-3 = very not well-managed; 3 = very well-managed)
df <- df %>%
mutate(foreign_aff = case_when(
foreign2a == 1 ~ 2,
foreign2a == 2 ~ 3,
foreign2b == 1 ~ -2,
foreign2b == 2 ~ -3,
foreign2c == 1 ~ 1,
foreign2c == 2 ~ -1,
foreign2c == 3 ~ 0
))
table(df$foreign_aff)
# Evaluation of environmental protection in China
# (-3 = very dissatisfied, 3 = very satisfied)
df <- df %>%
mutate(envir_protect = case_when(
envir2a == 1 ~ 2,
envir2a == 2 ~ 3,
envir2b == 1 ~ -2,
envir2b == 2 ~ -3,
envir2c == 1 ~ 1,
envir2c == 2 ~ -1,
envir2c == 3 ~ 0
))
table(df$envir_protect)
## Impute the covariates
df_cov <- df %>%
dplyr::select(ResponseId, age, female, educ, income, ccp, knowledge,
policy_trust, agreeableness, extraversion, conscientiousness,
attentiveness)
df_cov$age <- as.factor(df_cov$age)
df_cov$educ <- as.factor(df_cov$educ)
init <- mice(df_cov, maxit = 0, seed = 1)
meth <- init$method
predM <- init$predictorMatrix
imputed_data <- mice(df_cov, method = meth, predictorMatrix = predM, seed = 1)
df_imp <- complete(imputed_data, 1)
df <- df %>% left_join(df_imp, by = c("ResponseId"))
temp <- df
### Sample characteristics ----
library(fastDummies) # version 1.7.3
library(plotrix)     # version 3.8-2
## Location (i.e., Hukou type)
# Create a series of dummy variables to facilitate calculating mean and std
hukou_matrix <- df %>% select(hukou) %>% dummy_cols()
# Make a dataframe
Hukou <- data.frame(Values = c(0.304,
0.696,
mean(hukou_matrix$hukou_2, na.rm = TRUE),
mean(hukou_matrix$hukou_1, na.rm = TRUE)),
sd = c(0,
0,
std.error(hukou_matrix$hukou_2, na.rm = TRUE),
std.error(hukou_matrix$hukou_1, na.rm = TRUE)
),
Attributes = c("Rural", "Urban", "Rural", "Urban"),
dataset = c("China Internet", "China Internet",
"Our Sample", "Our Sample"))
# Visualize the results
descriptive_location <- ggplot(Hukou, aes(x = Attributes, y = Values, fill = dataset)) +
geom_bar(stat = "identity", position = position_dodge(), color = "black")+
scale_fill_grey() +
geom_errorbar(aes(ymin = Values - 1.96*sd, ymax = Values + 1.96*sd), width = .2,
position = position_dodge(.9))+
theme_minimal() +
xlab("Location") +
ylab("Proportion") +
coord_cartesian(ylim = c(0, 1)) +
theme(legend.position = "top",
legend.title = element_blank(),
text = element_text(family = "Times", size = 12),
axis.text = element_text(family = "Times", size = 11))
## Female
female_matrix <- df %>% select(female.x) %>% dummy_cols()
# Make a dataframe
Gender <- data.frame(Values = c(0.51,
0.49,
mean(female_matrix$female.x_0, na.rm = TRUE),
mean(female_matrix$female.x_1, na.rm = TRUE)),
sd = c(0,
0,
std.error(female_matrix$female.x_0, na.rm = TRUE),
std.error(female_matrix$female.x_1, na.rm = TRUE)
),
Attributes = c("Male", "Female", "Male", "Female"),
dataset = c("China Internet", "China Internet",
"Our Sample", "Our Sample"))
# Visualize the results
descriptive_gender <- ggplot(Gender, aes(x = Attributes, y = Values, fill = dataset)) +
geom_bar(stat = "identity", position = position_dodge(), color = "black") +
geom_errorbar(aes(ymin = Values - 1.96*sd, ymax = Values + 1.96*sd), width = .2,
position = position_dodge(.9)) +
scale_fill_grey() +
theme_minimal() +
xlab("Gender") +
ylab("") +
coord_cartesian(ylim = c(0, 1)) +
theme(legend.position = "top",
legend.title = element_blank(),
text = element_text(family = "Times", size = 12),
axis.text = element_text(family = "Times", size = 11))
## Age
# First, recode the values for comparisons in alignment with China Internet
df$age.x[df$age.x == 3] <- 2
df$age.x[df$age.x == 4] <- 3
df$age.x[df$age.x == 5] <- 3
# Make a dataframe
Age <- data.frame(Values = c(0.382, 0.391, 0.228,
mean(age_matrix$age.x_1, na.rm = TRUE),
mean(age_matrix$age.x_2, na.rm = TRUE),
mean(age_matrix$age.x_3, na.rm = TRUE)),
sd = c(0,
0,
0,
std.error(age_matrix$age.x_1, na.rm = TRUE),
std.error(age_matrix$age.x_2, na.rm = TRUE),
std.error(age_matrix$age.x_3, na.rm = TRUE)),
Attributes = c("< 30", "30-49", "50+", "< 30", "30-49", "50+"),
dataset = c("China Internet", "China Internet", "China Internet",
"Our Sample", "Our Sample", "Our Sample"))
# Create a series of dummy variables to facilitate calculating mean and std.error
age_matrix <- df %>% select(age.x) %>% dummy_cols()
# Make a dataframe
Age <- data.frame(Values = c(0.382, 0.391, 0.228,
mean(age_matrix$age.x_1, na.rm = TRUE),
mean(age_matrix$age.x_2, na.rm = TRUE),
mean(age_matrix$age.x_3, na.rm = TRUE)),
sd = c(0,
0,
0,
std.error(age_matrix$age.x_1, na.rm = TRUE),
std.error(age_matrix$age.x_2, na.rm = TRUE),
std.error(age_matrix$age.x_3, na.rm = TRUE)),
Attributes = c("< 30", "30-49", "50+", "< 30", "30-49", "50+"),
dataset = c("China Internet", "China Internet", "China Internet",
"Our Sample", "Our Sample", "Our Sample"))
# Visualize the results
descriptive_age <- ggplot(Age, aes(x = Attributes, y = Values, fill = dataset)) +
geom_bar(stat = "identity", position = position_dodge(), color = "black") +
geom_errorbar(aes(ymin = Values - 1.96*sd, ymax = Values + 1.96*sd), width = .2,
position = position_dodge(.9)) +
scale_fill_grey() +
theme_minimal() +
xlab("Age") +
ylab("") +
coord_cartesian(ylim = c(0, 1)) +
theme(legend.position = "top",
legend.title = element_blank(),
text = element_text(family = "Times", size = 12),
axis.text = element_text(family = "Times", size = 11))
## Education
# First, recode the values for comparisons in alignment China Internet census data
df$educ.x[df$educ.x == 6] <- 5
# Create a series of dummy variables to facilitate calculating mean and std.error
edu_matrix <- df %>% select(educ.x) %>% dummy_cols()
# Make a dataframe
Education <- data.frame(Values = c(0.192,
0.405,
0.215,
0.1,
0.088,
mean(edu_matrix$educ.x_1, na.rm = TRUE),
mean(edu_matrix$educ.x_2, na.rm = TRUE),
mean(edu_matrix$educ.x_3, na.rm = TRUE),
mean(edu_matrix$educ.x_4, na.rm = TRUE),
mean(edu_matrix$educ.x_5, na.rm = TRUE)),
sd = c(0,
0,
0,
0,
0,
std.error(edu_matrix$educ.x_1, na.rm = TRUE),
std.error(edu_matrix$educ.x_2, na.rm = TRUE),
std.error(edu_matrix$educ.x_3, na.rm = TRUE),
std.error(edu_matrix$educ.x_4, na.rm = TRUE),
std.error(edu_matrix$educ.x_5, na.rm = TRUE)
),
Attributes = c("Primary school or below", "Junior middle school",
"Senior middle school", "Three-year college",
"University or above", "Primary school or below",
"Junior middle school", "Senior middle school",
"Three-year college", "University or above"),
dataset = c("China Internet", "China Internet", "China Internet",
"China Internet", "China Internet", "Our Sample",
"Our Sample", "Our Sample", "Our Sample", "Our Sample"))
# Visualize the results
Education$Attributes <- factor(Education$Attributes,
levels = c("Primary school or below", "Junior middle school",
"Senior middle school", "Three-year college",
"University or above"))
descriptive_education <- ggplot(Education, aes(x = Attributes, y = Values, fill = dataset)) +
geom_bar(stat = "identity", position = position_dodge(), color = "black") +
geom_errorbar(aes(ymin = Values - 1.96*sd, ymax = Values + 1.96*sd), width = .2,
position = position_dodge(.9)) +
scale_fill_grey() +
theme_minimal() +
xlab("Education Level") +
ylab("Proportion") +
coord_cartesian(ylim = c(0, 1)) +
theme(legend.position = "top",
legend.title = element_blank(),
text = element_text(family = "Times", size = 12),
axis.text = element_text(family = "Times", size = 11))
# Putting all together
nt <- theme(legend.position = "none")
descriptive_comparison <-
grid_arrange_shared_legend(descriptive_education,
arrangeGrob(
descriptive_location + nt,
descriptive_age + nt,
descriptive_gender + nt,
ncol = 3),
ncol = 1, nrow = 2)
ggsave("descriptive_comparison.pdf", descriptive_comparison, width = 10, height = 8)
### Assessing differential attrition ----
## Focus only on respondents who gave consent and were in Control or Redistributionist Propaganda group
df_filter <- df %>% filter(consent == 1, group != 2)
## Ability of covariates to predict whether respondents finished the survey
reg1 <- lm_robust(Finished ~ age.x + female.x + educ.x + income.x + ccp.x +
knowledge.x + policy_trust.x + agreeableness.x + extraversion.x +
conscientiousness.x + attentiveness.x,
data = df_filter)
summary(reg1) # F(11, 1414) = 1.319, p = 0.207
## Ability of covariates to predict treatment status, conditional on finishing the survey
reg2 <- lm_robust(group ~ age.x + female.x + educ.x + income.x + ccp.x +
knowledge.x + policy_trust.x + agreeableness.x + extraversion.x +
conscientiousness.x + attentiveness.x,
data = df_filter, subset = Finished == 1)
summary(reg2) # F(11, 1397) = 0.6123, p = 0.820
## Generate a regression table (Table S5)
texreg(list(reg1, reg2),
include.ci = FALSE, stars = numeric(0), digits = 3,
custom.header = list("Finished the Survey (= 1)" = 1, "Assigned to Treatment (= 1)" = 2),
custom.note = "Entries are OLS estimates with robust standard errors in parentheses.",
fontsize = "small")
## Attrition timeline (Figure S9)
# devtools::install_github('lbassan/attritevis')
library(attritevis) # version 0.1.0
library(waffle)     # version 1.0.2
library(ggpubr)     # version 0.4.0
df_filter <- df %>% filter(consent == 1 & group != 2)
df_filter$treatment <- df_filter$group
df_filter$treatment[df_filter$treatment == 0] <- "Control"
df_filter$treatment[df_filter$treatment == 1] <- "Treatment"
attrition <- df_filter[ , c(17:53, 54:58, 119:123)]
pretreatment <- attrition %>% select(1:37)
posttreatment <- attrition %>% select(38:47)
attrition <- cbind(pretreatment, df_filter$treatment, posttreatment)
colnames(attrition)[38] <- "Treatment_given"
attrition <- attrition[ , -c(11, 13, 15, 17)]
colnames(attrition)[2] <- "age"
colnames(attrition)[3] <- "educ"
colnames(attrition)[9] <- "income"
colnames(attrition)[31] <- "policy_trust"
attrition_plot <-
attritevis::plot_attrition(data = attrition,
freq = FALSE,
treatment_q = "Treatment_given",
outcome_q = "demo1_NPS_GROUP",
mycolors = c(Control = "#000066",
Treatment = "#CC0033"),
total = FALSE,
tline = FALSE)
